1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

3.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

3.2 Correlations - Linear Regression

We tested the minimum model, adding only Birth Year, but adding Birth Year and Sampling Date we got the best results.

3.2.1 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY_SAMP/regression_summary_all_traits_BY_SampDt.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation (<0.15):
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture
  • CK = Clinical Ketosis
  • HHE = Heel Horn Erosion
  • MSP = Milking Speed

4 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

5 PHENOTYPE file

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

#The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

6 SNP MAP

Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

6.1 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

7 Quality Control

We ran the code below to perfom the QC okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

8 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink. okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king

plink2 \
    --bfile ${DATA} \
    --cow \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga: okok

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is no duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

9 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --cow \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

9.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

10 GWAS on GCTA

Previously we have performed GWAS on GCTA:

10.1 GWAS - EXTREME PHENO - WITH BY and SD

10.1.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

10.1.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components, and after this, using this variance components from the whole sample we performed the ssGWAS with the subset of individuals (34 High + 37 Low = 71), but this was not possible in GCTA so we switched to another software (BLUPF90+)

11 GWAS BLUPF90+

To run ssGWAS on Blupf90+ suite, we will need 4 different softwares:

The tutorial for preGSF90 and postGSF90 we can find in the link bellow https://nce.ads.uga.edu/wiki/doku.php?id=readme.pregsf90#gwas_options_postgsf90

According to the BLUPF90+ tutorial:

ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV

11.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • preGSf90 executable file
  • postGSf90 executable file
  • Parameter file

      11.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Phenotype
      THIRD COLUMN = Fixed Effect 1
      FOURTH COLUMN = Fixed Effect 2

      First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      To get in one file these four columns we need the following code:

      okok

      pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
      ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
      
      
      # Create data_final$cdn_id by matching IDs with elora_id
      data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
      fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
      fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
      fixeff$FID <- "HO"
      fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
      
      identical(fixeff$cdn_id, pheno$cdn_id)
      
      fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
      pheno <- pheno[,c("cdn_id", "cortisol")]
      
      # Load necessary libraries
      library(dplyr)
      
      # Merge pheno and fixeff data frames
      merged_data <- pheno %>% 
        left_join(fixeff, by = "cdn_id")
      
      
      merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
      
      merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
      
      write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)

      The file should be saved as text file, with separation by space and no columns names.

      PS: if there are any NA, they sould be replaced by 9999

      /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt (this file has 252 individuals)

      11.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Sire ID
      THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      /home/bambrozi/2_CORTISOL/RawFiles/Pedigree okok

      # to replace comma for space in the .csv file with the equivalence among IDs
      # /home/bambrozi/2_CORTISOL/RawFiles/Pedigree
      sed 's/,/ /g' bruno_ped.iid.csv > bruno_ped_iid_blup.txt

      11.1.3 Genotype file

      First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      /home/bambrozi/2_CORTISOL/RawFiles/Genotypes

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      Below we can find the file’s location from the code above: /home/bambrozi/2_CORTISOL/RawFiles/Genotypes/bruno_gntps.txt /home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid (this file has 252 individuals)

      11.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+ = we will use to estimate the Variance components and GEBVs
      • renumF90 = we will use to renumerate the files
      • preGSf90 = we will use to perform the Quality control
      • postGSf90 = we will use for GWAS

      11.1.5 SNP MAP

      /home/bambrozi/2_CORTISOL/RawFiles/SNP_map okok

      mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)
      
      colnames(mapfile)
      
      colnames(mapfile) <- c("SNP_ID", "CHR", "POS")
      
      mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]
      
      write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)

11.2 Variance component estimation

But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but we will change some default parameters an so perform this additional step.

The Parameter card for this step is the parameter bellow:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par

okok


DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
  • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
  • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
  • FIELDS_PASSED TO OUTPUT:
  • WEIGHT (S):
  • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
  • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
  • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
  • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
  • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
  • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
  • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
  • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
  • OPTION outcallrate: Save the call rate information on SNP markers in the file.
  • OPTION saveCleanSNPs: This option generates 4 new files. We assume snpfile as a marker file.
    • snpfile_clean = new SNP marker file.
    • snpfile_clean_XrefID = new cross-reference file.
    • snpfile_SNPs_removed = a list of removed markers.
    • snpfile_Animals_removed = a list of removed animals.
  • OPTION minfreq 0.01: Minimum allele frequency to retain the marker.
  • OPTION map_file snpmap.txt: This option will upload the SNP MAP
  • OPTION excludeCHR 30 31: This option will remove sexual chromosome that is the 30 and 31

To run any softwere from Blupf90 suit we will perform always in this way:

  1. Go to the server you wanna run this analysis, for instance, grand
ssh grand
  1. Now go to the directory you have created to run this analysis where that set of files are placed.
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.

The command above will generate couple files, among them renf90.par

We modified renf90.par in:
  • renf90_DataClean.par
  • renf90_VarCompEst.par

11.2.1 Quality control

The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par

It will run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.2.2 VCE

The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par

It will be run using the software pre blupf90+ to generate the VCE. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
  • OPTION SNP_file bruno_gntps_iid_clean: we are going to inform the genotype file generated in the previous step (the Quality Control). Blup will create an file with the same name that the original genotype file, and add the sufix **_clean**
  • OPTION no_quality_control we need to set up this option because we performed Quality Control in the previous step and now we don’t need that Blupf90+ perform again. Blupf90+ by default perform quality control, so if we do not want, we need to specify.
  • OPTION method: VCE (Variance Component Estimation).
  • OPTION OrigID: this will keep the original ID informed.
  • OPTION missing 9999: you are informing that missing values will appear as 9999
  • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
    • H2_1: the name that your function will appear on the output files.
    • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
    • **_1_1**: this effect is in the 1st column.
    • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.

The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log

blupf90.log
My Image

Now you should update you renf90_VarCompEst.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE

run blupf90+ again with the updated renf90_VarCompEst.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

blupf90.log
My Image

Parameter card
My Image

Now that we have the Variance components we go for the next step:
  • Prediction of SNP marker effects based on the GEBV
  • GWAS for High and Low cortisol animals
To do this, I’ll:
  • Keep the Phenotype_FixEff file from the previous analysis with records for 252 individuals
  • generate the genotype file like bellow
  • Perform the QUALITY CONTROL for the new (71 samples) genotype file
  • Add the VCE from the 252 data set in the parameter card
  • Generate the GEBV
  • Run GWAS

11.3 Prediction of SNP marker effects based on the GEBV

11.3.0.1 GWAS for High and Low cortisol animals

Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.

To perform this first we need a a Genotype file only with the 71 animals (High=34 and Low=37)

11.3.0.1.1 Updating the files
11.3.0.1.1.1 Phenotype and Fixed Effects files

In this read_me file we performed a ssGWAS but keeping the phenotype and fixed effect information for all 252 individuals, only reducing the number of individuals for the Genotype file..

BUT we are going to used the code bellow to remove individuals with MEDIUM cortisol Phenotype just to use as base to get the genotype file in the next step.

phenofix <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt")

# Now we are gona remove the intermediary animals from pheno object
phenofix$Categorical <- ifelse(phenofix$V2 >= 956, "H", 
                               ifelse(phenofix$V2 <= 190.8, "L", "M"))
table(phenofix$Categorical)

phenofix <- phenofix[phenofix$Categorical != "M", ]

phenofix <- phenofix[, c("V1", "V2", "V3", "V4")]

write.table(phenofix, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt", col.names = F, row.names = F, quote = F)
11.3.0.1.1.2 Genotype files

I used this command line bellow to remove the individuals with MEDIUM phenotypes. okok

awk 'NR==FNR{ids[$1]; next} $1 in ids' /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid > /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/bruno_gntps_iid_71

11.3.0.2 Running renum_QC.par

Run with renumf90

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renum_QC.par

okok

DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in 3 copies:
  • renf90_DataClean.par
  • renf90_ssGWAS1_Ginv.par
  • renf90_ssGWAS2_SNPeff.par

11.3.0.3 Running renf90_DataClean.par for Quality Control

To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_DataClean.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.3.0.4 Running renf90_ssGWAS1_Ginv.par for Ginv estimation

May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.

ulimit -s unlimited

The parameter card bellow we are going to run using the software blupf90+: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS1_Ginv.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value

11.3.0.5 Running renf90_ssGWAS2_SNPeff.par for GWAS

The parameter card bellow we are going to run using the software postGSf90: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS2_SNPeff.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
This code will generate several files, among them:
  • chrsnp_pval:
    • Column 1: trait
    • Column 2: effect
    • Column 3: -log10(p-value)
    • Column 4: SNP
    • Column 5: Chromosome
    • Column 6: Position in bp
  • Pft1e3.R: a r-code to generate the Manhattan plot in R using the chrsnp_pval

11.3.0.6 Manhattan Plots for BLUPF90 GWAS

11.3.0.6.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
png(file = "Pft1e3_manplot_with_bonf_ind_seg.png", width = 20000, height = 10000, res = 600)
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

We will use the file /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval to get the p-values for the SNPs in the windows (that explain more than 0.5% of additive genetic variance)

11.4 BLUPF90+ WINDOWS

To perform Window approach on Blupf90+ I copied all files in: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno to: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10 I did this because I’m not sure about which files the postGSf90 needs to run, so I duplicate the whole directory, and I will change only the file renf90_ssGWAS2_SNPeff.par name for renf90_ssGWAS2_SNPeff_W_10.par and change the options to perform the window approach.

11.4.1 Running renf90_ssGWAS2_SNPeff_W_10.par for GWAS (WINDOWS)

The parameter card bellow we are going to run using the software postGSf90:

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
OPTION SNP_moving_average 10
OPTION windows_variance 10 
The parameter file above will generate the following files:
  • snp_sol
    • column 1: trait
    • column 2: effect
    • column 3: SNP
    • column 4: Chromosome
    • column 5: Position
    • column 6: SNP solution
    • column 7: weight
    • column 8: variance explained by n adjacents SNP (if OPTION windows_variance is used).
    • column 9: variance of the SNP solution (used to compute the p-value) (if OPTION snp_p_value is used)
  • snp_pred
    • contains allele frequencies + SNP effects
  • chrsnpvar
    • column 1: trait
    • column 2: effect
    • column 3: variance explained by n adjacents SNP
      • If windows size is 20, the proportion of variance assigned to SNP 1 is calculated from SNP 1 to 20, for SNP 2 it goes from 2 to 21
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • chrsnp_pval
    • column 1: trait
    • column 2: effect
    • column 3: -log10(p-value)
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position in bp
  • chrsnp
    • column 1: trait
    • column 2: effect
    • column 3: values of SNP effects to use in Manhattan plots → [abs(SNP_i)/SD(SNP)]
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • windows_segments
    • column 1: label
    • column 2: window size (number of SNP)
    • column 3: Start SNP number for the window
    • column 4: End SNP number for the window
    • column 5: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
  • windows_variance
    • column 1: trait
    • column 2: effect
    • column 3: Start SNP number or SNP name for the window
    • column 4: End SNP number or SNP name for the window
    • column 5: window size (number of SNP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
    • column 8: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 9: variance explained by n adjacents SNP
  • Vft1e3.R
  • Sft1e3.R
  • Pft1e3.R

Bellow we can see the SNPs that explain more than 0.5% of Genetic Variance okok

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

write.csv(w_var, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05.csv")

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/SNPchimp_result_1425506748.tsv", header = T)

merged <- merge(rsid, w_var, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "Var")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "Var")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05_rsid_snpvar.csv")
X SNP_name rsID CHR BP Var
1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.5601843
2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2237598
3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6040267
4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.0965189
5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5887867
6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.0283717
7 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.2853203
8 ARS-BFGL-NGS-34854 rs109738387 23 14185501 0.5312912
9 ARS-BFGL-NGS-37809 rs42751504 3 111751663 0.9351573
10 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.7857140
11 ARS-BFGL-NGS-44131 rs110100483 3 111806406 0.8576835
12 ARS-BFGL-NGS-57285 rs109872657 9 10112014 0.5060728
13 ARS-BFGL-NGS-58358 rs109507088 27 16692516 0.5575927
14 ARS-BFGL-NGS-6202 rs110385521 3 111833768 0.9820334
15 ARS-BFGL-NGS-97489 rs42111498 27 16546003 0.5266455
16 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.6104093
17 BTA-25900-no-rs rs41575397 13 18509812 0.6018006
18 BTA-49096-no-rs rs41578131 2 115695003 1.5221061
19 BTA-95363-no-rs rs41615088 16 6121692 0.5967890
20 BTB-00952622 rs42110734 27 16663290 0.5891105
21 BTB-01068898 rs42226640 6 8630173 0.5563913
22 BTB-01069210 rs42225053 6 9142299 0.5157691
23 BTB-01485274 rs42609685 24 28540641 0.7103497
24 BTB-01608944 rs42723390 15 10974997 0.5126189
25 BTB-01641394 rs42752353 3 111730561 0.8541772
26 BTB-01948148 rs43056622 3 111677167 0.6871511
27 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8040708
28 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.0996405
29 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5085677
30 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.5842979
31 Hapmap54541-rs29014049 rs29014049 8 55218811 0.6997957
32 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7073805
33 Hapmap54981-rs29019846 rs29019846 24 28516684 0.6150000
34 Hapmap58887-rs29013502 rs29013502 24 28570245 0.5794518

11.4.1.1 Manhattan Plots for BLUPF90 Windows ssGWAS

okok

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/")
getwd()

# Read and process data
yyy1 = read.table("chrsnpvar")
yyy  = yyy1[order(yyy1$V4),]
zzz  = yyy[ which(yyy$V1==1 & yyy$V2==3), ]
n    = nrow(zzz)
y    = zzz[,4]
x    = zzz[,3]
chr1 = zzz[,5]
chr  = NULL
pos  = NULL
for (i in unique(yyy$V5)) {
  zz     = yyy[yyy$V5==i,]
  key    = zz$V4 
  medio  = round(nrow(zz)/2,0)
  z      = key[medio]
  pos    = c(pos,z)
}

# Assign colors for chromosomes
chrn       = unique(yyy$V5)
one        = which(chr1%%4==0) 
two        = which(chr1%%4==1) 
three      = which(chr1%%4==2) 
four       = which(chr1%%4==3) 
chr[one]   = "darkgoldenrod"
chr[two]   = "darkorchid"
chr[three] = "blue"
chr[four]  = "forestgreen"

# Generate Manhattan plot and save to PNG
png(file = "Vft1e3_manplot_with_thresholds.png", 
    width = 20000, height = 10000, res = 600) # Configure width, height, and resolution

# Set plot parameters
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2, mar = c(5, 5, 4, 2))

# Create Manhattan plot
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP Variance explained by 10 adjacents SNP window", 
     xlab = "", ylab = "% variance expl", pch = 20, 
     xlim = c(1, n), ylim = c(0, max(x)), col = chr, cex.axis = 1.2)

# Add dashed lines for thresholds
abline(h = 0.5, col = "red", lwd = 2, lty = 2)  # Red dashed line at 0.5

# Add legend for thresholds
legend("topright", legend = c("Threshold 0.5%"), 
       col = "red", lwd = 2, lty = 2, cex = 1)

# Add chromosome labels on the X-axis
axis(1, at = pos, labels = chrn, las = 1, cex.axis = 0.8)

# Close the graphics device
dev.off()

My Image

stop stop ### Finding p-values for each SNP in the window

To bring the p-value for each snp in the window we need to use files from different approachs:
  • chrsnpvar:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar

    • which contains the variance explained by each window. This file is from windows approach
  • chrsnp_pval:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval

    • which contains the p-value for each snp. This file is not from windows approach. Is for the approach which considers each SNP individually (beside fit all SNPs at the same time)

okok

chrsnpvar <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
chrsnpvar <- filter(chrsnpvar, V3 > 0.5)
colnames(chrsnpvar) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

# as the column 3: -log10(p-value) of this file doesn't say that this p-value is for the window, I'm assuming that this p-value is for individual SNP
chrsnp_pval <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval", header = F)
colnames(chrsnp_pval) <- c("trait", "effect", "-log10(p-value)", "SNP", "Chromosome", "Position in bp")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

# Bringing p-value for each snp in the window

# Create an empty row with the same column names
empty_row <- chrsnpvar[1, ]
empty_row[,] <- NA  # Set all values to NA

# Create a new dataframe with gaps
expanded_df <- do.call(rbind, lapply(1:nrow(chrsnpvar), function(i) {
  rbind(chrsnpvar[i, ], empty_row[rep(1, 9), ])
}))

# Reset row names
rownames(expanded_df) <- NULL

# Ensure the SNP column is numeric
expanded_df$SNP <- as.numeric(expanded_df$SNP)

# Loop through the dataframe and fill empty SNP values with sequential numbers
for (i in seq_len(nrow(expanded_df))) {
  if (is.na(expanded_df$SNP[i])) {
    expanded_df$SNP[i] <- expanded_df$SNP[i - 1] + 1
  }
}


# Assuming your data frame is called df
expanded_df$index <- 1:nrow(expanded_df)
merged_pvalue_var <- merge(chrsnp_pval, expanded_df, by = "SNP")
merged_pvalue_var <- arrange(merged_pvalue_var, index)

merged_pvalue_var$p_value <- 10^(-merged_pvalue_var$`-log10(p-value)`)

merged_pvalue_var <- merge(merged_pvalue_var, snp_map, by.x = c("Chromosome", "Position in bp"), by.y = c("CHR", "POS"), all.x = TRUE)

merged_pvalue_var <- arrange(merged_pvalue_var, index)

pvalue_var_allsnps <- merged_pvalue_var[, c("index", "Chromosome", "Position in bp",
                                            "SNP", "SNP_ID", "-log10(p-value)",
                                            "p_value", "Var")]

write.csv(pvalue_var_allsnps, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/pval_for_each_snp_in_window.csv")

The p-value for each SNP in the window is bellow:

## Warning: package 'readxl' was built under R version 4.3.3
## Warning: package 'knitr' was built under R version 4.3.3
Chr Position in bp SNP SNP_ID -LOG10(p-value) p_value Var
2 115622067 4582 Hapmap41888-BTA-49091 0.4075785 0.3912204 0.80407084600000001
2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 NA
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 1.2237597850999999
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 1.5221060874000001
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 1.2853203491
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 1.0965188597
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 0.70738053840000004
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 0.78571400749999998
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 0.5887867363
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
2 116430317 4598 Hapmap54608-rs29020417 1.0790898 0.0833509 NA
3 111677167 6878 BTB-01948148 1.0946657 0.0804145 0.68715106829999995
3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 NA
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 1.0996405039999999
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 0.85417720109999995
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 0.93515725350000001
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 1.0283717422
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 0.85768353340000003
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 0.98203344020000005
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 0.61040931220000005
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
3 112427851 6894 ARS-BFGL-NGS-65128 0.1144700 0.7682985 NA
3 112662102 6895 BTA-117752-no-rs 1.4158696 0.0383822 NA
6 8630173 11046 BTB-01068898 3.4115993 0.0003876 0.55639127330000004
6 8727520 11047 Hapmap33379-BTA-153882 0.2360505 0.5806969 NA
6 8794456 11048 Hapmap44182-BTA-104972 0.2803710 0.5243594 NA
6 8881200 11049 ARS-BFGL-NGS-54017 2.2115780 0.0061436 NA
6 9049173 11050 BTB-01645485 0.0063533 0.9854774 NA
6 9142299 11051 BTB-01069210 0.1114894 0.7735895 NA
6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
6 9142299 11051 BTB-01069210 0.1114894 0.7735895 0.51576907869999999
6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
6 9387456 11056 BTB-01939106 0.0631806 0.8646083 NA
6 9418094 11057 BTB-01322180 0.2428379 0.5716920 NA
6 9453914 11058 BTB-01322092 0.5623756 0.2739204 NA
6 9483232 11059 BTA-10940-no-rs 0.7251282 0.1883093 NA
6 9504486 11060 BTB-01322000 0.7154267 0.1925632 NA
8 55218811 15704 Hapmap54541-rs29014049 1.2509697 0.0561087 0.69979568290000005
8 55298755 15705 BTA-107975-no-rs 0.2434081 0.5709418 NA
8 55323946 15706 BTB-01616745 0.5026370 0.3143135 NA
8 55376512 15707 ARS-BFGL-NGS-76732 3.7920659 0.0001614 NA
8 55433879 15708 BTB-02017947 2.7518384 0.0017708 NA
8 55458002 15709 Hapmap51811-BTA-119735 1.8587790 0.0138427 NA
8 55493961 15710 BTB-01258879 3.0361970 0.0009200 NA
8 55517877 15711 ARS-BFGL-NGS-37680 1.5463493 0.0284217 NA
8 55552514 15712 ARS-BFGL-NGS-74002 2.6498886 0.0022393 NA
8 55603363 15713 ARS-BFGL-NGS-108258 3.0361970 0.0009200 NA
9 10112014 16913 ARS-BFGL-NGS-57285 1.7786476 0.0166476 0.50607275890000003
9 10139748 16914 ARS-BFGL-NGS-37889 1.6229291 0.0238271 NA
9 10221793 16915 ARS-BFGL-NGS-93995 0.7484987 0.1784437 NA
9 10260399 16916 ARS-BFGL-NGS-15511 0.7484987 0.1784437 NA
9 10337737 16917 BTA-83229-no-rs 0.2705532 0.5363481 NA
9 10490301 16918 ARS-BFGL-NGS-110691 0.1495558 0.7086703 NA
9 10529588 16919 Hapmap36170-SCAFFOLD200128_32745 0.2984731 0.5029524 NA
9 10601391 16920 ARS-BFGL-NGS-10202 0.0295204 0.9342856 NA
9 10622057 16921 Hapmap38633-BTA-83140 0.0528145 0.8854937 NA
9 10724264 16922 ARS-BFGL-NGS-17513 0.2137440 0.6113022 NA
13 18266073 23616 Hapmap50266-BTA-13664 0.3810341 0.4158780 0.58429790559999994
13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 NA
13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 0.50856768149999998
13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 0.60180061750000002
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 0.60402666900000002
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
13 18952584 23629 ARS-BFGL-NGS-116613 1.2880171 0.0515208 NA
15 10974997 26385 BTB-01608944 1.3520801 0.0444549 0.51261892730000003
15 11100934 26386 BTA-91816-no-rs 0.4733557 0.3362360 NA
15 11144666 26387 BTB-01421844 2.0829468 0.0082614 NA
15 11185546 26388 BTB-01421892 0.4733557 0.3362360 NA
15 11207429 26389 BTB-01421934 1.0078060 0.0982187 NA
15 11236303 26390 BTB-01422008 1.0078060 0.0982187 NA
15 11310260 26391 BTA-91820-no-rs 1.0078060 0.0982187 NA
15 11451408 26392 Hapmap40037-BTA-100798 0.9018532 0.1253565 NA
15 11487557 26393 BTA-94770-no-rs 0.0029926 0.9931329 NA
15 11550646 26394 ARS-BFGL-NGS-27782 0.8776148 0.1325517 NA
16 6121692 27745 BTA-95363-no-rs 1.4433689 0.0360272 0.59678898970000005
16 6308221 27746 BTB-01975868 3.5022880 0.0003146 NA
16 6329170 27747 ARS-BFGL-NGS-89740 2.8227273 0.0015041 NA
16 6421207 27748 BTB-01200020 1.6187843 0.0240556 NA
16 6446024 27749 BTB-01200008 1.4569926 0.0349146 NA
16 6529002 27750 Hapmap52413-rs29016330 0.7722977 0.1689283 NA
16 6549899 27751 ARS-BFGL-NGS-13802 1.4433689 0.0360272 NA
16 6614897 27752 BTA-109095-no-rs 2.1726638 0.0067195 NA
16 6656521 27753 Hapmap51387-BTA-109077 2.1726638 0.0067195 NA
16 6781530 27754 Hapmap33722-BTA-155362 1.4427417 0.0360793 NA
23 14185501 36197 ARS-BFGL-NGS-34854 1.6567406 0.0220424 0.53129124309999998
23 14208471 36198 ARS-BFGL-NGS-100006 1.6567406 0.0220424 NA
23 14239531 36199 ARS-BFGL-NGS-29516 1.5126341 0.0307161 NA
23 14306746 36200 ARS-BFGL-NGS-78514 1.5126341 0.0307161 NA
23 14329178 36201 ARS-BFGL-NGS-113147 1.5217814 0.0300759 NA
23 14365537 36202 ARS-BFGL-NGS-105392 0.8997870 0.1259543 NA
23 14387143 36203 ARS-BFGL-NGS-59308 1.6397377 0.0229225 NA
23 14416017 36204 ARS-BFGL-NGS-8960 0.7575833 0.1747498 NA
23 14487014 36205 ARS-BFGL-NGS-115470 0.8312254 0.1474941 NA
23 14571804 36206 Hapmap30534-BTA-137081 2.5196142 0.0030226 NA
24 28487771 37343 ARS-BFGL-BAC-28665 0.5617269 0.2743298 0.56018426669999999
24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 NA
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 0.61500002899999995
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 0.71034974449999999
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 0.57945177729999997
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
24 28963905 37355 BTA-93775-no-rs 0.0204343 0.9540380 NA
27 16546003 39915 ARS-BFGL-NGS-97489 2.1349421 0.0073292 0.52664546609999996
27 16628800 39916 Hapmap4685-BTA-29671 2.9562453 0.0011060 NA
27 16663290 39917 BTB-00952622 4.3841922 0.0000413 NA
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 16663290 39917 BTB-00952622 4.3841922 0.0000413 0.58911050239999996
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 0.55759268129999995
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
27 17149375 39927 ARS-BFGL-NGS-55767 0.1452961 0.7156553 NA

12 GALLO - 0.5% Windows - BLUPF90+

okok

gwas <- read.csv("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05_rsid_snpvar.csv")
colnames(gwas)
colnames(gwas) <- c("X", "SNP", "rsID", "CHR", "BP", "Var")


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_genes_w_05.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_qtl_w_05.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("SNP", "rsID", "CHR", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_qtl_w_05_clean.csv")

The GALLO output are bellow:

For GENES

X.1 X SNP rsID CHR BP Var chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.5601843 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
2 33 Hapmap54981-rs29019846 rs29019846 24 28516684 0.6150000 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
3 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.0965189 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
4 32 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7073805 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
5 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.0965189 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
6 32 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7073805 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
7 10 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.7857140 2 115948258 115951955 3698 + ENSBTAG00000021326 CCL20 protein_coding
8 10 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.7857140 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
9 5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5887867 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
10 27 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8040708 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
11 2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2237598 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
12 18 BTA-49096-no-rs rs41578131 2 115695003 1.5221061 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
13 7 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.2853203 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
14 7 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.2853203 2 115758046 115758105 60 + ENSBTAG00000054571 bta-mir-2285ao-1 miRNA
15 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.0965189 2 115791504 115808606 17103 + ENSBTAG00000050771 NA lncRNA
16 30 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.5842979 13 18226732 18280982 54251 - ENSBTAG00000016060 CREM protein_coding
17 29 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5085677 13 18321888 18393476 71589 + ENSBTAG00000007133 CUL2 protein_coding
18 17 BTA-25900-no-rs rs41575397 13 18509812 0.6018006 13 18449975 18466359 16385 + ENSBTAG00000052242 NA lncRNA
19 17 BTA-25900-no-rs rs41575397 13 18509812 0.6018006 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
20 3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6040267 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
21 26 BTB-01948148 rs43056622 3 111677167 0.6871511 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
22 28 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.0996405 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
23 25 BTB-01641394 rs42752353 3 111730561 0.8541772 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
24 9 ARS-BFGL-NGS-37809 rs42751504 3 111751663 0.9351573 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
25 6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.0283717 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
26 11 ARS-BFGL-NGS-44131 rs110100483 3 111806406 0.8576835 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
27 14 ARS-BFGL-NGS-6202 rs110385521 3 111833768 0.9820334 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
28 16 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.6104093 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
29 16 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.6104093 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
30 12 ARS-BFGL-NGS-57285 rs109872657 9 10112014 0.5060728 9 9970661 10070666 100006 - ENSBTAG00000020817 B3GAT2 protein_coding
31 15 ARS-BFGL-NGS-97489 rs42111498 27 16546003 0.5266455 27 16420193 16546078 125886 - ENSBTAG00000020657 FAT1 protein_coding
32 20 BTB-00952622 rs42110734 27 16663290 0.5891105 27 16673409 16675767 2359 - ENSBTAG00000050498 NA protein_coding
33 13 ARS-BFGL-NGS-58358 rs109507088 27 16692516 0.5575927 27 16673409 16675767 2359 - ENSBTAG00000050498 NA protein_coding
34 19 BTA-95363-no-rs rs41615088 16 6121692 0.5967890 16 5920408 6165638 245231 - ENSBTAG00000040409 NA protein_coding

FOR QTLs

X SNP rsID CHR QTL_type start_pos end_pos QTL_ID
1 ARS-BFGL-BAC-28665 rs111010562 24 Meat_and_Carcass 28473214 28473218 232857
2 Hapmap54981-rs29019846 rs29019846 24 Meat_and_Carcass 28473214 28473218 232857
3 BTB-01485274 rs42609685 24 Health 28570243 28570247 57038
4 Hapmap58887-rs29013502 rs29013502 24 Health 28570243 28570247 57038
5 BTB-01485274 rs42609685 24 Health 28570243 28570247 57040
6 Hapmap58887-rs29013502 rs29013502 24 Health 28570243 28570247 57040
7 BTB-01485274 rs42609685 24 Production 28570243 28570247 69281
8 Hapmap58887-rs29013502 rs29013502 24 Production 28570243 28570247 69281
9 Hapmap58887-rs29013502 rs29013502 24 Reproduction 28604670 28604674 138598
10 ARS-BFGL-BAC-35548 rs110100182 2 Production 115695001 115695005 283322
11 BTA-49096-no-rs rs41578131 2 Production 115695001 115695005 283322
12 ARS-BFGL-NGS-30337 rs110485060 2 Production 115695001 115695005 283322
13 ARS-BFGL-NGS-103753 rs110842922 2 Milk 115820324 115820328 215425
14 ARS-BFGL-NGS-103753 rs110842922 2 Exterior 115839213 115839217 125900
15 Hapmap54770-rs29009608 rs29009608 2 Exterior 115839213 115839217 125900
16 Hapmap54770-rs29009608 rs29009608 2 Milk 115909335 115909339 155904
17 Hapmap54770-rs29009608 rs29009608 2 Milk 115909359 115909363 155914
18 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39013
19 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39013
20 ARS-BFGL-NGS-43721 rs108974471 2 Reproduction 115986083 115986087 39014
21 ARS-BFGL-NGS-107330 rs109766798 2 Reproduction 115986083 115986087 39014
22 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39015
23 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39015
24 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39016
25 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39016
26 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39017
27 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39017
28 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39018
29 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39018
30 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39019
31 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39019
32 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39020
33 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39020
34 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39021
35 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39021
36 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39022
37 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39022
38 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39023
39 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39023
40 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39024
41 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39024
42 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39025
43 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39025
44 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39026
45 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39026
46 ARS-BFGL-NGS-43721 rs108974471 2 Reproduction 115986083 115986087 39027
47 ARS-BFGL-NGS-107330 rs109766798 2 Reproduction 115986083 115986087 39027
48 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39028
49 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39028
50 Hapmap49833-BTA-103929 rs41603335 13 Milk 18340131 18340135 116582
51 Hapmap49833-BTA-103929 rs41603335 13 Milk 18349683 18349687 116735
52 Hapmap49833-BTA-103929 rs41603335 13 Milk 18370805 18370809 249726
53 Hapmap49833-BTA-103929 rs41603335 13 Milk 18381731 18381735 116525
54 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46808
55 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46809
56 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46810
57 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46811
58 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46812
59 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46813
60 Hapmap49833-BTA-103929 rs41603335 13 Milk 18386940 18386944 46814
61 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46815
62 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46816
63 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46817
64 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46818
65 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46819
66 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46820
67 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46821
68 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46822
69 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18386940 18386944 151596
70 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 226401
71 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 228662
72 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 234093
73 Hapmap49833-BTA-103929 rs41603335 13 Milk 18406821 18406825 116526
74 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46823
75 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46824
76 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46825
77 BTA-25900-no-rs rs41575397 13 Milk 18489668 18489672 46826
78 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46827
79 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46828
80 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46829
81 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46830
82 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46831
83 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46832
84 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46833
85 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46834
86 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46835
87 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46836
88 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46837
89 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46837
90 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46838
91 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46838
92 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46839
93 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46839
94 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46840
95 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46840
96 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46841
97 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46841
98 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46842
99 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46842
100 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46843
101 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46843
102 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46844
103 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46844
104 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46845
105 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46845
106 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46846
107 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46846
108 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46847
109 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46847
110 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46848
111 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46848
112 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46849
113 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46849
114 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46850
115 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46850
116 BTA-25900-no-rs rs41575397 13 Health 18509810 18509814 46851
117 ARS-BFGL-BAC-7444 rs110491621 13 Health 18509810 18509814 46851
118 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46852
119 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46852
120 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46853
121 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46853
122 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46854
123 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46854
124 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46855
125 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46855
126 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46856
127 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46856
128 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46857
129 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46857
130 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46858
131 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46858
132 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46859
133 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46859
134 BTA-25900-no-rs rs41575397 13 Production 18558538 18558542 46860
135 ARS-BFGL-BAC-7444 rs110491621 13 Production 18558538 18558542 46860
136 BTA-25900-no-rs rs41575397 13 Production 18558538 18558542 46861
137 ARS-BFGL-BAC-7444 rs110491621 13 Production 18558538 18558542 46861
138 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46862
139 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46862
140 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46863
141 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46863
142 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46864
143 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46864
144 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46865
145 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46865
146 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46866
147 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46866
148 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46867
149 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46867
150 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46868
151 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46868
152 BTB-01948148 rs43056622 3 Reproduction 111708234 111708238 30008
153 Hapmap42062-BTA-109789 rs41621207 3 Reproduction 111708234 111708238 30008
154 BTB-01641394 rs42752353 3 Reproduction 111708234 111708238 30008
155 ARS-BFGL-NGS-37809 rs42751504 3 Reproduction 111708234 111708238 30008
156 BTB-01948148 rs43056622 3 Reproduction 111708234 111708238 30244
157 Hapmap42062-BTA-109789 rs41621207 3 Reproduction 111708234 111708238 30244
158 BTB-01641394 rs42752353 3 Reproduction 111708234 111708238 30244
159 ARS-BFGL-NGS-37809 rs42751504 3 Reproduction 111708234 111708238 30244
160 BTB-01948148 rs43056622 3 Meat_and_Carcass 111708234 111708238 152258
161 Hapmap42062-BTA-109789 rs41621207 3 Meat_and_Carcass 111708234 111708238 152258
162 BTB-01641394 rs42752353 3 Meat_and_Carcass 111708234 111708238 152258
163 ARS-BFGL-NGS-37809 rs42751504 3 Meat_and_Carcass 111708234 111708238 152258
164 BTB-01948148 rs43056622 3 Meat_and_Carcass 111717521 111717525 225527
165 Hapmap42062-BTA-109789 rs41621207 3 Meat_and_Carcass 111717521 111717525 225527
166 BTB-01641394 rs42752353 3 Meat_and_Carcass 111717521 111717525 225527
167 ARS-BFGL-NGS-37809 rs42751504 3 Meat_and_Carcass 111717521 111717525 225527
168 ARS-BFGL-NGS-97849 rs110553601 3 Meat_and_Carcass 111991214 111991218 151880
169 ARS-BFGL-NGS-97849 rs110553601 3 Meat_and_Carcass 111991214 111991218 152096
170 ARS-BFGL-NGS-34854 rs109738387 23 Reproduction 14164327 14164331 52030
171 ARS-BFGL-NGS-34854 rs109738387 23 Production 14164327 14164331 52031
172 ARS-BFGL-NGS-34854 rs109738387 23 Meat_and_Carcass 14168378 14168382 229449
173 ARS-BFGL-NGS-34854 rs109738387 23 Reproduction 14185499 14185503 52032
174 ARS-BFGL-NGS-34854 rs109738387 23 Production 14185499 14185503 52033
175 ARS-BFGL-NGS-34854 rs109738387 23 Reproduction 14208469 14208473 52034
176 ARS-BFGL-NGS-34854 rs109738387 23 Production 14208469 14208473 52035
177 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16565442 16565446 70892
178 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16578044 16578048 70788
179 ARS-BFGL-NGS-97489 rs42111498 27 Reproduction 16579768 16579772 53392
180 ARS-BFGL-NGS-97489 rs42111498 27 Exterior 16579768 16579772 53393
181 ARS-BFGL-NGS-97489 rs42111498 27 Reproduction 16579768 16579772 53394
182 ARS-BFGL-NGS-97489 rs42111498 27 Exterior 16579768 16579772 53395
183 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16579768 16579772 53396
184 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16579768 16579772 53397
185 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16579768 16579772 53398
186 ARS-BFGL-NGS-97489 rs42111498 27 Production 16579768 16579772 53399
187 ARS-BFGL-NGS-97489 rs42111498 27 Production 16579768 16579772 53400
188 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16579768 16579772 53401
189 ARS-BFGL-NGS-97489 rs42111498 27 Milk 16579768 16579772 53402
190 ARS-BFGL-NGS-97489 rs42111498 27 Exterior 16579768 16579772 53403
191 ARS-BFGL-NGS-97489 rs42111498 27 Reproduction 16579768 16579772 53404
192 ARS-BFGL-NGS-97489 rs42111498 27 Reproduction 16579768 16579772 53405
193 ARS-BFGL-NGS-97489 rs42111498 27 Exterior 16579768 16579772 53406
194 ARS-BFGL-NGS-97489 rs42111498 27 Exterior 16579768 16579772 53407
195 BTB-00952622 rs42110734 27 Milk 16616482 16616486 70382
196 BTB-00952622 rs42110734 27 Milk 16617996 16618000 70383
197 BTB-00952622 rs42110734 27 Milk 16669779 16669783 70769
198 ARS-BFGL-NGS-58358 rs109507088 27 Milk 16669779 16669783 70769
199 BTA-95363-no-rs rs41615088 16 Production 6121690 6121694 68846
200 BTB-01069210 rs42225053 6 Meat_and_Carcass 9150592 9150596 231667
201 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47797
202 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47798
203 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47799
204 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47800
205 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47801
206 BTB-01608944 rs42723390 15 Production 10936000 10936004 47802
207 BTB-01608944 rs42723390 15 Production 10936000 10936004 47803
208 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47804
209 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47805
210 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47806
211 BTB-01608944 rs42723390 15 Health 10936000 10936004 47807
212 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47808
213 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47809
214 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47810
215 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47811
216 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 281490

QTL Plots okok

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10")

#QTL type plot
oldpar <- par(mar=c(5,15,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

#QTL names plot (by type)

# Set the width and height in pixels for 600 DPI
png("qtl_names_w05.png", width=5100, height=6600, res=600)  # Width and height in pixels
# Set layout for multiple plots
par(mfrow=c(6, 1), mar=c(2, 20, 1, 1))
# List of QTL classes for titles
qtl_classes <- c("Production", "Reproduction", "Milk", "Meat_and_Carcass", "Health", "Exterior")
# Loop through each QTL class and plot
for (qtl_class in qtl_classes) {
  plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=qtl_class)
  title(main=qtl_class)  # Add the QTL class as the main title for each plot
}
# Close the device
dev.off()


#QTL enrichment analysis 

out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_enrich_qtl_genome_name_w05.csv")


out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_enrich_qtl_genome_type_w05.csv")


#Plots

#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL, out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL, out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL type My Image

QTL name by type My Image

12.0.0.1 Windows - QTL enrichment on GALLO

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
36 Stillbirth 11 1363 148 163224 0.0000001 0.0000027 Reproduction
10 Foot angle 7 672 148 163224 0.0000031 0.0000472 Exterior
31 Rear leg placement - side view 6 430 148 163224 0.0000031 0.0000472 Exterior
6 Calving ease 14 3819 148 163224 0.0000113 0.0001276 Reproduction
15 Length of productive life 10 2004 148 163224 0.0000168 0.0001509 Production
27 Net merit 7 903 148 163224 0.0000211 0.0001581 Production
30 Rear leg placement - rear view 5 491 148 163224 0.0000937 0.0006021 Exterior
20 Milk potassium content 5 570 148 163224 0.0001870 0.0010521 Milk
9 Feet and leg conformation 5 627 148 163224 0.0002895 0.0014476 Exterior
44 Udder depth 5 695 148 163224 0.0004620 0.0020789 Exterior
29 PTA type 4 627 148 163224 0.0026752 0.0109395 Production
37 Strength 4 664 148 163224 0.0032812 0.0109395 Exterior
39 Teat placement - front 3 327 148 163224 0.0034034 0.0109395 Exterior
42 Udder attachment 4 655 148 163224 0.0031260 0.0109395 Exterior
11 Head width 1 6 148 163224 0.0054281 0.0162844 Production
32 Respiratory rate 1 9 148 163224 0.0081312 0.0228691 Health
33 Rump conformation 1 13 148 163224 0.0117240 0.0310341 Exterior
3 Body temperature 1 23 148 163224 0.0206495 0.0516237 Health
23 Milk tridecylic acid content 2 319 148 163224 0.0343560 0.0801886 Milk
25 Muscle calcium content 1 40 148 163224 0.0356394 0.0801886 Meat and Carcass
26 Muscle sodium content 1 56 148 163224 0.0495393 0.1061556 Meat and Carcass
45 Udder height 2 504 148 163224 0.0771386 0.1577835 Exterior
14 Intramuscular fat 1 117 148 163224 0.1007311 0.1970826 Meat and Carcass
1 Age at first calving 1 233 148 163224 0.1906422 0.3558427 Reproduction
19 Milk glycosylated kappa-casein percentage 4 2527 148 163224 0.1976904 0.3558427 Milk
28 Pregnancy rate 2 944 148 163224 0.2112679 0.3656560 Reproduction
16 Marbling score 3 1817 148 163224 0.2284300 0.3699997 Meat and Carcass
38 Teat length 1 300 148 163224 0.2384442 0.3699997 Exterior
40 Teat placement - rear 1 298 148 163224 0.2370588 0.3699997 Exterior
13 Interval to first estrus after calving 2 1053 148 163224 0.2475498 0.3713247 Reproduction
35 Somatic cell score 2 1122 148 163224 0.2706460 0.3928732 Health
22 Milk protein yield 4 3093 148 163224 0.3086197 0.4339965 Milk
12 Interval from first to last insemination 1 445 148 163224 0.3325080 0.4534200 Reproduction
43 Udder cleft 1 477 148 163224 0.3516591 0.4654311 Exterior
8 Dairy form 1 514 148 163224 0.3731230 0.4797296 Exterior
2 Body depth 1 616 148 163224 0.4287078 0.5358848 Production
5 Body weight gain 1 1354 148 163224 0.7086966 0.8619283 Production
34 Shear force 2 2954 148 163224 0.7503609 0.8885853 Meat and Carcass
41 Tenderness score 2 3483 148 163224 0.8265004 0.9536543 Meat and Carcass
18 Milk fat yield 5 8220 148 163224 0.8711850 0.9800832 Milk
4 Body weight 1 4289 148 163224 0.9806056 0.9974015 Production
7 Connective tissue amount 1 3142 148 163224 0.9437546 0.9974015 Meat and Carcass
17 Milk fat percentage 6 10941 148 163224 0.9364537 0.9974015 Milk
21 Milk protein percentage 4 8803 148 163224 0.9610298 0.9974015 Milk
24 Milk yield 1 6432 148 163224 0.9974015 0.9974015 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 46 9077 148 163224 0.0000000 0.0000000
2 Health 4 5889 148 163224 0.7848136 1.0000000
3 Meat and Carcass 11 18258 148 163224 0.9498647 1.0000000
4 Milk 31 75352 148 163224 1.0000000 1.0000000
5 Production 25 19640 148 163224 0.0502520 0.1507559
6 Reproduction 31 35008 148 163224 0.5912798 1.0000000

My Image

13 g:PROFILER - 0.5% Windows - BLUPF90+

This output are in this directory: /home/bambrozi/2_CORTISOL/GPROFILER/BLUPF90_GWAS/keep_all_pheno_windows

Bellow the data inserted in g:PROFILER My Image

Bellow we can find the output for g:Profiler online

My Image

source term_name term_id highlighted adjusted_p_value negative_log10_of_adjusted_p_value term_size query_size intersection_size effective_domain_size intersections
GO:BP establishment or maintenance of apical/basal cell polarity GO:0035088 true 0.0235367 1.628255 42 9 2 16190 ENSBTAG00000014991,ENSBTAG00000020657
GO:BP establishment or maintenance of epithelial cell apical/basal polarity GO:0045197 false 0.0235367 1.628255 38 9 2 16190 ENSBTAG00000014991,ENSBTAG00000020657
GO:BP establishment or maintenance of bipolar cell polarity GO:0061245 false 0.0235367 1.628255 42 9 2 16190 ENSBTAG00000014991,ENSBTAG00000020657
GO:BP establishment of epithelial cell apical/basal polarity involved in camera-type eye morphogenesis GO:0003412 true 0.0419704 1.377057 1 9 1 16190 ENSBTAG00000020657
GO:CC SCF ubiquitin ligase complex GO:0019005 true 0.0317252 1.498595 56 10 2 17218 ENSBTAG00000021327,ENSBTAG00000007133
GO:CC PAR polarity complex GO:0120157 true 0.0400534 1.397361 3 10 1 17218 ENSBTAG00000014991
GO:CC ATF4-CREB1 transcription factor complex GO:1990589 true 0.0400534 1.397361 3 10 1 17218 ENSBTAG00000016060
REAC Tight junction interactions REAC:R-BTA-420029 false 0.0451867 1.344989 6 4 1 8225 ENSBTAG00000014991
REAC Vitamin B1 (thiamin) metabolism REAC:R-BTA-196819 false 0.0451867 1.344989 5 4 1 8225 ENSBTAG00000021325

14 Heritability estimation - BLUPF90

14.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • Parameter file

      14.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2

      To get in one file these four columns we need the following code:

      #File with equivalence among different ids
      eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
      
      # Genotype file with cid
      geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
      
      # Phenotipic file and fixed effects
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
      
      # creating a pheno file with only ID, Cortisol, BY and Sam date columns
      pheno <- data_final %>%
        select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column iid and and bring the iid from eq_ids to geno file
      pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
      
      # organizing columns sequence and keep only iid
      pheno <- pheno%>%
        select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column geno$iid, and bring the iid from eq_ids to geno file
      geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
      
      # organizing the columns sequence
      library(dplyr)
      geno <- geno %>%
        select(V1, V2, iid, everything())
      
      # Keeping in the pheno file only the rows present also in geno file
      pheno <- pheno %>%
        filter(iid %in% geno$iid)
      
      write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)

      The file should be saved as text file, with separation by space and no columns names.

      14.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      14.1.3 Genotype file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      14.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+
      • renumF90

      14.1.5 Parameter file

      The appearance of this file is like this:

      My Image

      • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
      • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
      • FIELDS_PASSED TO OUTPUT:
      • WEIGHT (S):
      • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
      • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
      • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
      • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
      • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
      • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
      • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
      • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
      • OPTION method: VCE (Variance Component Estimation).
      • OPTION OrigID: this will keep the original ID informed.
      • OPTION missing 9999: you are informing that missing values will appear as 9999
      • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
        • H2_1: the name that your function will appear on the output files.
        • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
        • **_1_1**: this effect is in the 1st column.
        • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

14.2 Running renumF90 and BlupF90+

  1. Go to the server you wanna run this analysis, for instance, grand

  2. Now go to the directory you have created to run this analysis where that set of files are placed.

ssh grand
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will as you the name of your parameter card.

renumF90 will generate a new parameter card called renf90.par

  1. Run blupf90+
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par

blupF90+ will generate the blupf90.log file with the results.

My Image

Now you should update you renf90.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE

  1. 2nd blupf90+ run
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

14.3 Running renumF90 and BlupF90+ adding GENOTYPES

The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.

Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.

The highlighted text show the added part.

My Image

Go to the sub-diretory

Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location

My Image

To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90

./../renumf90

Run blupf90+

./../blupf90+

The steps for run, update parameter card, re-run are the same.

14.4 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image

15 Additional analysis

15.0.1 GEBVs Average and SD

I calculated the average and SD for the GEBVs

GEBVs <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/solutions.orig", header = T)

equiv_id <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")

mer <- merge(equiv_id, GEBVs, by.x = "iid", by.y = "original_id")

cortisol  <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

output <- merge(cortisol, mer, by.x = "ID", by.y = "elora_id")

colnames(output)

output <- output[, c("ID", "iid", "cdn_id", "T4Cortisol", "solution")]

mean_solution <- mean(output$solution, na.rm = TRUE)
sd_solution <- sd(output$solution, na.rm = TRUE)

mean_solution
sd_solution

library(ggplot2)

# Create histogram with density curve, mean, and SD lines
ggplot(output, aes(x = solution)) +
  geom_histogram(aes(y = ..density..), fill = "lightblue", color = "black", bins = 30, alpha = 0.6) +
  geom_density(color = "darkred", size = 1) +  # Density curve
  geom_vline(aes(xintercept = mean_solution), color = "red", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution + sd_solution), color = "blue", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution - sd_solution), color = "blue", linetype = "dashed", size = 1) +
  labs(title = "Histogram and Density Curve of GEBVs", x = "Solution", y = "Density") +
  theme_minimal()


# Subset for values BELOW 1 SD from the mean
output_low <- output %>% 
  filter(solution < (mean_solution - sd_solution))

# Subset for values ABOVE 1 SD from the mean
output_high <- output %>% 
  filter(solution > (mean_solution + sd_solution))

# Subset for values WITHIN 1 SD from the mean (intermediary)
output_intermediary <- output %>% 
  filter(solution >= (mean_solution - sd_solution) & solution <= (mean_solution + sd_solution))

write.csv(output, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/all.csv")
write.csv(output_low, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/low.csv")
write.csv(output_high, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/high.csv")
write.csv(output_intermediary, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/intermediary.csv")

mean_solution <- round(mean(output$solution, na.rm = TRUE), 2)
sd_solution <- round(sd(output$solution, na.rm = TRUE), 2)


output$Category <- ifelse(output$T4Cortisol >= 956, "H", 
                            ifelse(output_low <= 190.8, "L", "M"))

# Create the plot
ggplot(output, aes(x = T4Cortisol, y = solution)) +
  geom_point(size = 2, color = "black") +  # Smaller scatter plot points
  labs(x = "T4 Cortisol (pg/mL)", y = "GEBV", title = "T4 Cortisol vs GEBV") +
  geom_hline(yintercept = mean_solution, color = "blue", linetype = "dashed", size = 1) +  # Mean line
  geom_hline(yintercept = mean_solution + sd_solution, color = "red", linetype = "dashed", size = 1) +  # +1SD line
  geom_hline(yintercept = mean_solution - sd_solution, color = "red", linetype = "dashed", size = 1) +  # -1SD line
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution, label = "Mean"), color = "blue", vjust = -0.5, hjust = 1) +  # Label for mean
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution + sd_solution, label = "+1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for +1SD
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution - sd_solution, label = "-1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for -1SD
  theme_minimal() +
  theme(
    axis.line = element_line(color = "black"), 
    panel.grid.major = element_line(color = "gray80", linewidth = 0.5), 
    panel.grid.minor = element_line(color = "gray90", linewidth = 0.25), 
    legend.title = element_text(size = 10),
    legend.position = "right"
  )

15.0.2 Age at sampling date

Merg_Cort_GEBVs <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, all_of(Columns_to_use))

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date2$Sampling_date <- as.Date(samp_date2$Sampling_date, format = "%m/%d/%Y")

table(samp_date2$Sampling_date)

samp_date <- select(samp_date2, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, Sampling_date)

library(lubridate)

data_final$BIRTH_DATE <- ymd(data_final$BIRTH_DATE)

# Calculate age in total days
data_final$Age_days <- as.numeric(difftime(data_final$Sampling_date, data_final$BIRTH_DATE, units = "days"))


# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/Data/data_age_samplingdate.csv")

# Calculate values
average_age <- mean(data_final$Age_days, na.rm = TRUE)
sd_age <- sd(data_final$Age_days, na.rm = TRUE)
median_age <- median(data_final$Age_days, na.rm = TRUE)
range_age <- range(data_final$Age_days, na.rm = TRUE)

# Create a data frame to save
age_summary <- data.frame(
  Metric = c("Mean", "Standard Deviation", "Median", "Min", "Max"),
  Value = c(average_age, sd_age, median_age, range_age[1], range_age[2])
)

# Save to a CSV file
write.csv(age_summary, "/home/bambrozi/2_CORTISOL/Data/age_summary.csv", row.names = FALSE)
Metric Value
Mean 244.86822
Standard Deviation 42.92195
Median 237.50000
Min 162.00000
Max 365.00000

15.0.3 Common enriched QTLs

qtl_pval <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name_pvalue.csv")
qtl_w05 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w05.csv")
qtl_w01 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w01.csv")

library(tidyverse)

qtl_pval <- filter(qtl_pval, adj.pval <= 0.05) 
qtl_w05 <- filter(qtl_w05, adj.pval <= 0.05) 
qtl_w01 <- filter(qtl_w01, adj.pval <= 0.05) 

qtl_pval$approach <- "pval"
qtl_w05$approach <- "w05"
qtl_w01$approach <- "w01"

qtl_all_approachs <- rbind(qtl_pval, qtl_w05, qtl_w01)

write.csv(qtl_all_approachs, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/enrich_sig_qtl_all_approachs.csv")

qtl_names <- unique(qtl_all_approachs$QTL)

qtl_names <- sort(qtl_names)


# Create a matrix with the desired dimensions
matrix_length <- length(qtl_names)
common_enrich_qtl <- matrix(NA, nrow = matrix_length, ncol = 4)

# Optionally, name the rows and columns
colnames(common_enrich_qtl) <- c("QTL_names", "Single_SNP", "Windows_0.5", "Windows_0.1")

common_enrich_qtl[, "QTL_names"] <- qtl_names

common_enrich_qtl <- as.data.frame(common_enrich_qtl)

# Ensure both vectors have the same length (or handle if they don't)
common_enrich_qtl$Single_SNP <- ifelse(common_enrich_qtl$QTL_names %in% qtl_pval$QTL, "yes", NA)
common_enrich_qtl$Windows_0.5 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w05$QTL, "yes", NA)
common_enrich_qtl$Windows_0.1 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w01$QTL, "yes", NA)

write.csv(common_enrich_qtl, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/common_enrich_qtl.csv")